========================================================================= 
Log Path: /Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version/output/log/log_C_CompustatQuarterly.log 
Program Path: /Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version/code/0_DownloadData/3_CompustatQuarterly.R 
Working Directory: /Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version 
User Name: wenhao 
R Version: 4.4.0 (2024-04-24) 
Machine: Wenhaos-MacBook-2020.local x86_64 
Operating System: Darwin 23.4.0 Darwin Kernel Version 23.4.0: Wed Feb 21 21:44:31 PST 2024; root:xnu-10063.101.15~2/RELEASE_X86_64 
Base Packages: stats graphics grDevices utils datasets methods base 
Other Packages: tidylog_1.1.0 zoo_1.8-12 dtplyr_1.3.1 RPostgres_1.4.7 procs_1.0.6 reporter_1.4.4 libr_1.3.3 logr_1.3.8 fmtr_1.6.5 common_1.1.3 sassy_1.2.5 here_1.0.1 data.table_1.15.4 lubridate_1.9.3 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.2 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0 
Log Start Time: 2024-07-08 10:59:34.9739 
========================================================================= 

Warning: incomplete final line found on '/Users/wenhao/Dropbox/Research/Inverted Liquidity Crises/Data/Firm quality and financial availability - Wenhao's version/code/0_DownloadData/3_CompustatQuarterly.R' 

> library(tidyverse)
> library(data.table)
> library(dtplyr)
> library(lubridate)
> library(stringr)
> library(zoo)
> library(here)
> library(sassy)
> library(RPostgres)
> 
> # Open the log
> lf <- log_open(file.path(here("output", "log"), "log_C_CompustatQuarterly.log"),
>                autolog = T, show_notes = F)
> 
> 
> # Send code to the log
> log_code()
> 
> # 1 Read data from WRDS -------------------------------------------------------------------
> sep("1 Read data from WRDS")
> 
> wrds <- dbConnect(Postgres(),
>                   host='wrds-pgdata.wharton.upenn.edu',
>                   port=9737,
>                   dbname='wrds',
>                   sslmode='require',
>                   user='wenhao19')
> 
> SQL_statement <- 
>   "
>   SELECT a.gvkey, a.datadate, a.fyearq, a.fqtr, a.datacqtr, a.datafqtr, a.acoq,
> 		     a.actq,a.ajexq,a.apq,a.atq,a.ceqq,a.cheq,a.cogsq,a.cshoq,a.cshprq,
> 		     a.dlcq,a.dlttq,a.dpq,a.drcq,a.drltq,a.dvpsxq,a.dvpq,a.dvy,a.epspiq,a.epspxq,a.fopty,
> 		     a.gdwlq,a.ibq,a.invtq,a.intanq,a.ivaoq,a.lcoq,a.lctq,a.loq,a.ltq,a.mibq,
> 		     a.niq,a.oancfy,a.oiadpq,a.oibdpq,a.piq,a.ppentq,a.ppegtq,a.prstkcy,a.prccq,
> 		     a.pstkq,a.rdq,a.req,a.rectq,a.revtq,a.saleq,a.seqq,a.sstky,a.txdiq,
> 		     a.txditcq,a.txpq,a.txtq,a.xaccq,a.xintq,a.xsgaq,a.xrdq, a.capxy
>   FROM COMP.FUNDQ as a
> 	WHERE a.consol = 'C'
> 	AND a.popsrc = 'D'
> 	AND a.datafmt = 'STD'
> 	AND a.curcdq = 'USD'
> 	AND a.indfmt = 'INDL'
> "
> 
> res <- dbSendQuery(conn = wrds, statement = SQL_statement)
> df_CompustatQuarterly <- dbFetch(res)
> dbClearResult(res)
> 
> # 2 Cleaning -------------------------------------------------------------------
> sep("2 Cleaning")
> 
> # Convert the data frame to a data.table
> setDT(df_CompustatQuarterly)
> 
> # Keep only the most recent data for each fiscal quarter
> df_CompustatQuarterly <- df_CompustatQuarterly %>%
>   arrange(datadate) %>%
>   group_by(gvkey, fyearq, fqtr) %>%
>   slice(n()) %>%
>   ungroup() %>% 
>   as.data.table()
> 
> # Data availability assumed
> # Assume data available with a 3-month lag
> df_CompustatQuarterly <- df_CompustatQuarterly %>%
>   mutate(time_avail_m = as.Date(datadate) %m+% months(3)) %>% 
>   mutate(time_avail_m = year(time_avail_m) * 100 + month(time_avail_m))
> 
> # Patch cases with earlier data availability
> df_CompustatQuarterly <- df_CompustatQuarterly %>%
>   mutate(rdq_ym = year(rdq)*100+month(rdq))  %>% 
>   mutate(time_avail_m = ifelse(!is.na(rdq) & rdq_ym > time_avail_m, rdq_ym, time_avail_m))
> 
> # Drop cases with very late release
> df_CompustatQuarterly <- df_CompustatQuarterly %>% 
>   filter(!(as.numeric(rdq - datadate) > 180 & !is.na(rdq)))
> 
> # Keep the most recent information for each gvkey and time_avail_m
> df_CompustatQuarterly <- df_CompustatQuarterly %>%
>   arrange(datadate) %>%
>   group_by(gvkey, time_avail_m) %>%
>   slice(n()) %>%
>   ungroup() %>% 
>   as.data.table()
> 
> # Handling missing values assumed to be 0
> zero_cols <- c("acoq", "actq", "apq", "cheq", "dpq", "drcq", "invtq", "intanq", "ivaoq", 
>              "gdwlq", "lcoq", "lctq", "loq", "mibq", "prstkcy", "rectq", "sstky", "txditcq")
> 
> # Replace missing values with 0 in the specified columns using %>% (pipe)
> df_CompustatQuarterly <- df_CompustatQuarterly %>%
>   mutate(across(all_of(zero_cols), ~ifelse(is.na(.), 0, .)))
> 
> # Prepare year-to-date items
> df_CompustatQuarterly <- df_CompustatQuarterly %>% arrange(gvkey, fyearq, fqtr)
> 
> ytdVars <- c("sstky", "prstkcy", "oancfy", "fopty")
> 
> for (v in ytdVars) {
>   df_CompustatQuarterly <- df_CompustatQuarterly %>%
>     group_by(gvkey, fyearq) %>%
>     mutate(!!paste0(v, "q") := ifelse(fqtr == 1, .data[[v]], .data[[v]] - lag(.data[[v]]))) %>%
>     ungroup() %>%
>     as.data.table()
> }
> 
> # save
> fwrite(df_CompustatQuarterly, here("data", "Compustat", "q_QCompustat.csv"))
> saveRDS(df_CompustatQuarterly, here("data", "Compustat", "q_QCompustat.RDS"))
> 
> # 3 Monthly version -------------------------------------------------------------------
> sep("Monthly version")
> 
> # Expand to monthly
> 
> df_m_QCompustat <- df_CompustatQuarterly 
> setDT(df_m_QCompustat)
> 
> df_m_QCompustat <- df_m_QCompustat[, .SD[rep(1, 3)], by = .(gvkey, time_avail_m)]
> # Update 'time_avail_m' based on group-specific calculations
> df_m_QCompustat[, datadate2 := datadate %m+% months(0:(.N - 1)), by = .(gvkey, time_avail_m)]
> df_m_QCompustat[, datadate2 := as.Date(datadate2) %m+% months(3)] 
> df_m_QCompustat[, time_avail_m := year(datadate2)*100+month(datadate2)]
> 
> # A few obervation have two rows in the same month after expanding, keep the most recent obs
> df_m_QCompustat <- df_m_QCompustat %>%
>   arrange(datadate) %>%
>   group_by(gvkey, time_avail_m) %>%
>   slice(n()) %>%
>   ungroup() %>%
>   as.data.table()
> 
> df_m_QCompustat <- df_m_QCompustat %>%
>   #select(-tempTimeAvailM, -fyearq, -fqtr, -datacqtr, -datafqtr) %>%
>   rename(datadate = datadate2,
>          datadateq = datadate) 
> 
> 
> # save
> fwrite(df_m_QCompustat, here("data", "Compustat", "m_QCompustat.csv"))
> saveRDS(df_m_QCompustat, here("data", "Compustat", "m_QCompustat.RDS"))
> 
> # Close log
> log_close()
> 
> # View results
> writeLines(readLines(lf))

========================================================================= 
1 Read data from WRDS 
========================================================================= 

========================================================================= 
2 Cleaning 
========================================================================= 

group_by: 3 grouping variables (gvkey, fyearq, fqtr)

slice (grouped): removed 918 rows (<1%), 1,798,879 rows remaining (removed 0 groups, 1,798,879 groups remaining)

ungroup: no grouping variables remain

mutate: new variable 'time_avail_m' (Date) with 760 unique values and 0% NA

mutate: converted 'time_avail_m' from Date to double (0 new NA)

mutate: new variable 'rdq_ym' (double) with 638 unique values and 35% NA

mutate: changed 51,824 values (3%) of 'time_avail_m' (0 new NAs)

filter: removed 11,232 rows (1%), 1,787,647 rows remaining

group_by: 2 grouping variables (gvkey, time_avail_m)

slice (grouped): removed 2,906 rows (<1%), 1,784,741 rows remaining (removed 0 groups, 1,784,741 groups remaining)

ungroup: no grouping variables remain

mutate: changed 564,162 values (32%) of 'acoq' (564,162 fewer NAs)

        changed 678,682 values (38%) of 'actq' (678,682 fewer NAs)

        changed 475,703 values (27%) of 'apq' (475,703 fewer NAs)

        changed 459,423 values (26%) of 'cheq' (459,423 fewer NAs)

        changed 569,084 values (32%) of 'dpq' (569,084 fewer NAs)

        changed 1,350,372 values (76%) of 'drcq' (1,350,372 fewer NAs)

        changed 1,178,080 values (66%) of 'gdwlq' (1,178,080 fewer NAs)

        changed 494,236 values (28%) of 'invtq' (494,236 fewer NAs)

        changed 1,113,693 values (62%) of 'intanq' (1,113,693 fewer NAs)

        changed 1,737,084 values (97%) of 'ivaoq' (1,737,084 fewer NAs)

        changed 568,719 values (32%) of 'lcoq' (568,719 fewer NAs)

        changed 669,961 values (38%) of 'lctq' (669,961 fewer NAs)

        changed 458,960 values (26%) of 'loq' (458,960 fewer NAs)

        changed 514,076 values (29%) of 'mibq' (514,076 fewer NAs)

        changed 740,431 values (41%) of 'prstkcy' (740,431 fewer NAs)

        changed 496,776 values (28%) of 'rectq' (496,776 fewer NAs)

        changed 702,379 values (39%) of 'sstky' (702,379 fewer NAs)

        changed 627,550 values (35%) of 'txditcq' (627,550 fewer NAs)

group_by: 2 grouping variables (gvkey, fyearq)

mutate (grouped): new variable 'sstkyq' (double) with 105,477 unique values and <1% NA

ungroup: no grouping variables remain

group_by: 2 grouping variables (gvkey, fyearq)

mutate (grouped): new variable 'prstkcyq' (double) with 81,262 unique values and <1% NA

ungroup: no grouping variables remain

group_by: 2 grouping variables (gvkey, fyearq)

mutate (grouped): new variable 'oancfyq' (double) with 302,695 unique values and 47% NA

ungroup: no grouping variables remain

group_by: 2 grouping variables (gvkey, fyearq)

mutate (grouped): new variable 'foptyq' (double) with 44,522 unique values and 94% NA

ungroup: no grouping variables remain

========================================================================= 
Monthly version 
========================================================================= 

Error : object 'df_m_QCompustat' not found 
Traceback: 
[1] "5: eval(call(\"unlockBinding\", \".N\", parent.frame()))"           
[2] "4: `[.data.table`(.SD, rep(1, 3))"                                  
[3] "3: .SD[rep(1, 3)]"                                                  
[4] "2: `[.data.table`(df_m_QCompustat, , .SD[rep(1, 3)], by = .(gvkey, "
[5] "       time_avail_m))"                                              
[6] "1: df_m_QCompustat[, .SD[rep(1, 3)], by = .(gvkey, time_avail_m)]"  

========================================================================= 
Log End Time: 2024-07-08 11:10:02.411213 
Log Elapsed Time: 0 00:10:27 
========================================================================= 
